knitr::opts_chunk$set(error = FALSE,
tidy = TRUE,
# tidy.opts = list(width.cutoff = 60),
message = FALSE,
warning = FALSE)
library(tidyverse)
library(readxl)
library(readr)
library(haven)
library(knitr)
library(kableExtra)
library(lubridate)
library(labelled)
library(gtsummary)
library(data.table)
# Working directory saved as `wd` and data path as `data_path`
wd <- getwd()
data_path <- paste(wd, "Data", sep = "/")
id_sid_matchup# Import & clean id_sid_matchup2.sas7bdat
id_sid_matchup2 <- read_sas(paste(data_path, "ids", "id_sid_matchup2.sas7bdat", sep = "/"))
id_sid_clean <- id_sid_matchup2 %>%
transmute(sid, newsid = as.character(newsid), shift_num = str_extract(shift_num_ampm,
"[:digit:]+"), date = d8, am_pm = str_extract(shift_num_ampm, "am|pm"), shift_start_dttm = startd8time,
shift_start_tm = ShiftStart, shift_end_dttm = ymd_hms(paste(date, ShiftEnd)),
shift_end_tm = ShiftEnd, staff = as_factor(staff))
# preview id_sid_match_cleaned
rmarkdown::paged_table(id_sid_clean)
# Save id_sid_clean to files in csv and Rdata format write_csv(id_sid_clean,
# path = paste(data_path, 'Rdata', 'id_sid_clean.Rdata', sep = '/'))
# write_csv(id_sid_clean, path = paste(data_path, 'csv_files',
# 'id_sid_clean.csv', sep = '/'))
id_finalids_final <- read_sas(paste(data_path, "ids", "finalsids6944.sas7bdat", sep = "/"))
# preview ids_final (no cleaning needed)
rmarkdown::paged_table(ids_final)
# Check if sids repeate within shifts:
ids_final %>%
group_by(shift_num) %>%
count(sid) %>%
filter(n > 1) %>%
rmarkdown::paged_table()
pt_sidspt_sids <- read_sas(paste(data_path, "ids", "patientSIDs_in_completepat.sas7bdat",
sep = "/"))
# preview unclean pt_sids glimpse(pt_sids)
pt_sid_clean <- pt_sids %>%
transmute(sid, shift_num = str_extract(shift_num_ampm, "[:digit:]+"), shift_ampm = as_factor(str_extract(shift_num_ampm,
"am|pm")))
# preview pt_sid_clean
rmarkdown::paged_table(pt_sid_clean)
pt_sid_clean %>%
group_by(shift_num) %>%
count(shift_ampm) %>%
arrange(shift_num) %>%
rmarkdown::paged_table()
pt_sid_clean %>%
count(shift_ampm) %>%
rmarkdown::paged_table()
# Save to file write_rds(pt_sid_clean, path = paste(data_path, 'Rdata',
# 'pt_sid_clean.Rdata', sep = '/')) write_csv(pt_sid_clean, path =
# paste(data_path, 'csv_files', 'pt_sid_clean.csv', sep = '/'))
# generate_dictionary(pt_sid_clean)
##staff_sids
staff_sids <- read_sas(paste(data_path, "ids", "staffsids_in_eventfile.sas7bdat",
sep = "/"))
str(staff_sids)
## tibble [95 × 5] (S3: tbl_df/tbl/data.frame)
## $ sid : chr [1:95] "0002f35c" "0002f43c" "0002f445" "0002f447" ...
## $ job : chr [1:95] "" "" "" "" ...
## $ job_SAH : chr [1:95] "STAFF" "RN" "STAFF" "RN" ...
## $ ParticipantCat4: chr [1:95] "STAFF" "RN" "STAFF" "RN" ...
## ..- attr(*, "label")= chr "MD/RN/Staff/Patient"
## $ JOBTITLE : chr [1:95] "STAFF" "RN" "STAFF" "RN" ...
staff_sids_clean <- staff_sids %>%
mutate(across(.cols = !sid, .fns = as.factor)) %>%
select(-job)
rmarkdown::paged_table(staff_sids_clean)
staff_sids_clean %>%
count(ParticipantCat4) %>%
rmarkdown::paged_table()
# Save to file write_csv(staff_sids_clean, file = paste(data_path, 'Rdata',
# 'staff_sids_clean.Rdata', sep = '/')) write_csv(staff_sids_clean, file =
# paste(data_path, 'csv_files', 'staff_sids_clean.csv', sep = '/'))
# generate_dictionary(staff_sids_clean)
allstaffallstaff <- read_sas(paste(data_path, "staff", "allstaff_numbyshift.sas7bdat", sep = "/"))
# str(allstaff)
allstaff_clean <- allstaff %>%
mutate(job_title = as_factor(JOBTITLE), .keep = "unused")
# write_rds(allstaff_clean, paste(data_path, 'Rdata', 'allstaff_clean.Rdata',
# sep = '/')) write_csv(allstaff_clean, paste(data_path, 'csv_files',
# 'allstaff_clean.csv', sep = '/'))
rmarkdown::paged_table(allstaff_clean)
# generate_dictionary(allstaff_clean)
ntab2ntab2 <- read_csv(file = paste(data_path, "ids", "nTab2.csv", sep = "/"))
# glimpse(ntab2)
ntab2_clean <- ntab2 %>%
transmute(observation = obs, sid = sids, id = as.character(ID), date = dmy(D8),
shift_num = shiftnum, shift_person = person, staff = as_factor(staff), degree,
wdegree, WtAlpha)
rmarkdown::paged_table(ntab2_clean)
# save to file write_rds(ntab2_clean, file = paste(data_path, 'Rdata',
# 'ntab2_clean.Rdata', sep = '/')) write_csv(ntab2_clean, file =
# paste(data_path, 'csv_files', 'ntab2_clean.csv', sep = '/'))
# generate_dictionary(ntab2_clean)
ntab2 lists 6944 observations inclusive of all shifts. These observations list up to - individuals per shift, all 81 shifts.
shift_timesshift_times <- read_xls(paste(data_path, "shifts", "Shift Start End times v3.xls",
sep = "/"), col_names = c("date", "shift_start_tm", "shift_end_tm", "shift_ampm",
"comments"), col_types = c("date", "guess", "guess", "text", "text", "skip",
"skip", "skip", "skip", "skip", "skip"), skip = 1)
# shift_times %>% str()
rmarkdown::paged_table(shift_times)
# generate_dictionary(shift_times) write_csv(shift_times, file =
# paste(data_path, 'csv_files', 'shift_times.csv', sep = '/'))
# write_rds(shift_times, file = paste(data_path, 'Rdata', 'shift_times.Rdata',
# sep = '/'))
counts_per_shiftcounts_per_shift <- read_sas(paste(data_path, "shifts", "countspershift.sas7bdat",
sep = "/"))
# str(counts_per_shift)
countspershift_clean <- counts_per_shift %>%
transmute(shift_num, date = d8, am_pm = str_extract(shift_num_ampm, "am|pm"),
staffcount = as.integer(staffcount), patcount = as.integer(patcount), total = as.integer(total))
rmarkdown::paged_table(countspershift_clean)
# write_rds(countspershift_clean, file = paste(data_path, 'Rdata',
# 'countspershift_clean.Rdata', sep = '/')) write_csv(countspershift_clean,
# file = paste(data_path, 'csv_files', 'countspershift_clean.csv', sep = '/'))
person_datasetperson_dataset <- read_sas(paste(data_path, "patients", "person_dataset.sas7bdat",
sep = "/"))
person_dataset <- person_dataset %>%
transmute(shift_num = as.integer(shiftnum), person = as.character(person), staff = as_factor(staff),
degree, minutes)
rmarkdown::paged_table(person_dataset)
# generate_dictionary(person_dataset) write_rds(person_dataset,
# paste(data_path, 'Rdata', 'person_dataset.Rdata', sep = '/'))
# write_csv(person_dataset, paste(data_path, 'csv_files', 'person_dataset.csv',
# sep = '/'))
location_categorieslocation_cats <- read_sas(paste(data_path, "rooms", "location_categories_list.sas7bdat",
sep = "/"))
rmarkdown::paged_table(location_cats)
location_defslocation_defs <- read_sas(paste(data_path, "rooms", "location_definitions.sas7bdat",
sep = "/"))
rmarkdown::paged_table(location_defs)
# generate_dictionary(location_defs)
room_locsroom_locs <- read_xlsx(paste(data_path, "rooms", "Room Locations and Square Footage - with Correction.xlsx",
sep = "/"), col_names = c("id", "name", "type", "sqft_area"), col_types = c("text",
"text", "text", "numeric", "skip"), skip = 1)
rmarkdown::paged_table(room_locs)
locations_listlocations_list <- read_sas(paste(data_path, "rooms", "locationslist.sas7bdat", sep = "/"))
rmarkdown::paged_table(locations_list)
Demographic information with med_rec numbers and encounter numbers. Note that med_rec numbers may repeat!
demographics_dfdemographic_df <- read_csv(paste(data_path, "patients", "Population_Patient_Demographics.csv", sep = "/"))
# glimpse(demographic_df)
demographics_clean <- demographic_df %>%
transmute(med_rec_no = as.character(`Medical Record Number`),
encounter_no = as.character(`Enc .`),
arrival_date = mdy(`ED Arrival Timestamp`),
departure_date = mdy(`ED Departure Timestamp`),
# across(!where(is.Date)
# & !where(is.double)
# & !where(is.integer)
# & !med_rec_no
# & !encounter_no
# & !`ED Arrival Timestamp`
# & !`ED Departure Timestamp`
# & !`Chief Complaint`
# & !`% ED Patients Admitted as Inpt or Obs`,
# .fns = as_factor),
sex = as_factor(Sex, ),
age = `Patient Age at Visit`,
race = as_factor(Race),
chief_complaint = `Chief Complaint`,
acuity = as_factor(Acuity),
arrival_mode = as_factor(`Arr Mode`),
disposition = as_factor(`ED Disposition`)) %>%
# str()
# demographics_clean_test %>%
mutate(across(where(is.factor), ~recode(.x, `Not Recorded` = NA_character_, `Unknown` = NA_character_))) %>%
mutate(acuity = fct_relevel(acuity,
"1 Immediate",
"2 Emergent",
"3 Urgent",
"4 Stable",
"5 Non Urgent"),
arrival_cat = fct_recode(arrival_mode,
"EMS" = "EMS Ground",
"EMS" = "EMS Air",
"Custody" = "Police/Jail",
"Ambulatory" = "Private Vehicle",
"Ambulatory" = "Public Trans",
"Ambulatory" = "Walk"),
race = fct_relevel(race,
"Black",
"White",
"Hispanic",
"Asian",
"Other",
"1",
"2",
"3"), .after = 10)
# generate_dictionary(demographics_clean_test)
# generate_dictionary(demographics_clean)
rmarkdown::paged_table(demographics_clean)
demographics_clean %>%
count(race) %>%
rmarkdown::paged_table()
demographics_clean %>%
group_by(acuity) %>%
summarise(`Total (n)` = n(),
`Black (n)` = sum(race == "Black", na.rm = TRUE),
`Black (%)` = round((`Black (n)`/`Total (n)`)*100, 2),
`White (n)` = sum(race == "White", na.rm = TRUE),
`White (%)` = round((`White (n)`/`Total (n)`)*100, 2),
`Hispanic (n)` = sum(race == "Hispanic", na.rm = TRUE),
`Hispanic (%)` = round((`Hispanic (n)`/`Total (n)`) * 100, 2),
`Other (n)` = sum(race == "Other", na.rm = TRUE),
`Other (%)` = round((`Other (n)`/`Total (n)`) * 100, 2),
`Missing (n)` = sum(is.na(race)),
`Missing (%)` = round((`Missing (n)`/`Total (n)`) * 100, 2)) %>%
rename(Acuity = acuity) %>%
rmarkdown::paged_table()
# write_rds(demographics_clean, paste(data_path, "Rdata", "demographics_clean.Rdata", sep = "/"))
# write_csv(demographics_clean, paste(data_path, "csv_files", "demographics_clean.csv", sep = "/"))
pop_pt_demographicspop_pt_demographics <- read_csv(paste(data_path, "patients", "Population_Patient_Demographics.csv",
sep = "/"))
nms <- c("arrival_date", "med_rec_no", "encounter_no", "sex", "age", "race", "chief_complaint",
"acuity", "arrival_mode", "departure_date", "disposition", "prop_admitted")
names(pop_pt_demographics) <- nms
# str(pop_pt_demographics) generate_dictionary(pop_pt_demographics)
pop_pt_demo_clean <- pop_pt_demographics %>%
mutate(med_rec_no = as.character(med_rec_no), encounter_no = as.character(encounter_no),
arrival_date = mdy(arrival_date), .after = 3, departure_date = mdy(departure_date),
sex = as_factor(sex), race = as_factor(race), acuity = as_factor(acuity),
arrival_mode = as_factor(arrival_mode)) %>%
mutate(across(where(is.factor), ~recode(.x, `Not Recorded` = NA_character_, Unknown = NA_character_))) %>%
mutate(acuity = fct_relevel(acuity, "1 Immediate", "2 Emergent", "3 Urgent",
"4 Stable", "5 Non Urgent"), arrival_cat = fct_recode(arrival_mode, EMS = "EMS Ground",
EMS = "EMS Air", Custody = "Police/Jail", Ambulatory = "Private Vehicle",
Ambulatory = "Public Trans", Ambulatory = "Walk"), .after = 9, race = fct_relevel(race,
"Black", "White", "Hispanic", "Asian", "Other", "1", "2", "3"))
rmarkdown::paged_table(pop_pt_demo_clean)
# generate_dictionary(pop_pt_demo_clean) write_csv(pop_pt_demo_clean, file =
# paste(data_path, 'csv_files', 'pop_pt_demo_clean.csv', sep = '/'))
# write_rds(pop_pt_demo_clean, file = paste(data_path, 'Rdata',
# 'pop_pt_demo_clean.Rdata', sep = '/'))
pts_populationpts_population <- read_sas(paste(data_path, "patients", "Patients_population.sas7bdat",
sep = "/"))
rmarkdown::paged_table(pts_population)
# generate_dictionary(pts_population) write_csv(pts_population, file =
# paste(data_path, 'csv_files', 'pts_population.csv', sep = '/'))
# write_rds(pts_population, file = paste(data_path, 'Rdata',
# 'pts_population.Rdata', sep = '/'))
pts_pop2pts_pop2 <- read_sas(paste(data_path, "patients", "patients_population2.sas7bdat",
sep = "/"))
rmarkdown::paged_table(pts_pop2)
# generate_dictionary(pts_pop2)
# write_csv(pts_pop2, file = paste(data_path, 'csv_files', 'pts_pop2.csv', sep
# = '/')) write_rds(pts_pop2, file = paste(data_path, 'Rdata',
# 'pts_pop2.Rdata', sep = '/'))
“completepat.sas7bdat” contains RFID badge locations (by room number) for each sampled patient for every second they were monitored in the ED.
The file is large, so subset a random sample of observations with
slice_sample() to test code for wrangling.
Use pivot_longer() to reshape from wide format to long
format by collapsing all location-by-second columns (variables
flocX$, where X is the shift time in seconds
that the respective SID was identified in room number
flocX) into two columns, names set to seconds
and values to location.
Note that
pivot_longer()will create many duplicate SIDs and manyNAs which will require filtering.
pt_completept_complete <- read_sas(paste(data_path, "patients", "completepat.sas7bdat", sep = "/"))
# glimpse(pt_complete)
# 1. subset a random sample of 30 observations for data transformation code preparation
# set.seed(711)
# pt_complete_sample <- pt_complete %>% slice_sample(n = 30)
# pt_complete_sample %>%
pt_complete_long <- pt_complete %>%
# 2. Pivot the data.frame from wide to long by placing all column names that start with "floc" into a new column, "seconds," and placing respective observations for each "floc" variable into a "location_num" column
pivot_longer(cols = starts_with("floc"),
names_to = "shift_second", # moves column names to new column "seconds"
values_to = "location_num", # moves location numbers to new column "location_num"
values_drop_na = TRUE) %>% # removes all rows (observations) without location data
# 3. Delete the prefix "floc" from `seconds` and change class to numeric
mutate(shift_second = as.double(str_replace(shift_second, "floc", "")),
location_num = as_factor(location_num),
shift_num_ampm = str_trim(shift_num_ampm),
am_pm = as_factor(str_extract(shift_num_ampm, "am|pm")),
shift_num = as.integer(str_extract(shift_num_ampm, "[:digit:]+")),
date = make_date(year = year, month = mon, day = day)) %>%
select(sid, date, shift_num, am_pm, location_num, shift_second) %>%
# count(am_pm)
mutate(day_night = as_factor(if_else(am_pm == "am", "day", "night")),
.after = 4) # %>%
# glimpse()
# Save as .Rdata file
rmarkdown::paged_table(pt_complete_long)
# write_rds(pt_complete_long, file = paste(data_path, "Rdata", "pt_complete_long.Rdata", sep = "/"))
# write_csv(pt_complete_long, file = paste(data_path, "csv_files", "pt_complete_long.csv", sep = "/"))
# generate_dictionary(pt_complete_long)
After transforming & cleaning “completepat.sas7bdat”,
pt_complete_longis 7 variables wide and 61342625 rows long.
The SAS data file, “completestaff.sas7bdat,” contains RFID badge location (by room number) for all staff each second of every shift.
staff_complete# 1. pull 'completestaff.sas7bdat' from `sas_data_list` as `tibble()`
staff_complete <- read_sas(paste(data_path, "staff", "completestaff.sas7bdat", sep = "/"))
# Subset a random sample of 30 observations set.seed(321) staff_complete_sample
# <- staff_complete %>% slice_sample(n = 30) 2. Use a random sample of
# staff_complete for data cleaning & transformation
staff_complete_long <- staff_complete %>%
# 2a. Pivot the data.frame from wide to long by placing all column names
# that start with 'floc' into a new column, 'seconds,' and placing
# respective observations for each 'floc' variable into a 'location_num'
# column
pivot_longer(cols = starts_with("floc"), names_to = "seconds", values_to = "location_num",
values_drop_na = TRUE) %>%
# 2b. Remove the prefix 'floc' from `time_seconds` and keep the digits as
# `seconds`
mutate(seconds = as.double(str_replace(seconds, "floc", "")), location_num = as_factor(location_num),
shift_num_ampm = str_trim(shift_num_ampm), shift_num = as.integer(str_extract(shift_num_ampm,
"[:digit:]+")), am_pm = as_factor(str_extract(shift_num_ampm, "am|pm")),
date = make_date(year = year, month = mon, day = day)) %>%
select(-d8:-mon)
# staff_complete_sample2 write_rds(staff_complete_long, file = paste(data_path,
# 'Rdata', 'staff_complete_long.Rdata', sep = '/'))
# write_csv(staff_complete_long, file = paste(data_path, 'csv_files',
# 'staff_complete_long.csv', sep = '/')) 3. View data frame structure
# generate_dictionary(staff_complete_long)
rmarkdown::paged_table(staff_complete_long)
net_all_xlnet_all_xl <- read_xlsx(paste(data_path, "networks", "network_allshifts.xlsx", sep = "/"))
str(net_all_xl)
## tibble [46,062 × 17] (S3: tbl_df/tbl/data.frame)
## $ i : num [1:46062] 34 34 34 34 34 34 34 34 35 35 ...
## $ any : num [1:46062] 1 1 1 1 1 1 1 1 1 1 ...
## $ staffi : num [1:46062] 0 0 0 0 0 0 0 0 0 0 ...
## $ idi : num [1:46062] 79200934 79200934 79200934 79200934 79200934 ...
## $ d8 : POSIXct[1:46062], format: "2009-07-09" "2009-07-09" ...
## $ H1N1 : num [1:46062] 0 0 0 0 0 0 0 0 0 0 ...
## $ quarter : num [1:46062] 1 1 1 1 1 1 1 1 1 1 ...
## $ shiftampm : num [1:46062] 2 2 2 2 2 2 2 2 2 2 ...
## $ d9 : num [1:46062] 5 5 5 5 5 5 5 5 5 5 ...
## $ edgeweight: num [1:46062] 0.0639 0.0806 0.2206 0.0706 0.0119 ...
## $ j : num [1:46062] 37 66 80 84 85 86 93 98 38 40 ...
## $ staffj : num [1:46062] 0 0 0 0 0 0 0 0 0 0 ...
## $ combo : num [1:46062] 0 0 0 0 0 0 0 0 0 0 ...
## $ idj : num [1:46062] 79200937 79200966 79200980 79200984 79200985 ...
## $ comboc : chr [1:46062] "0 patient-patient" "0 patient-patient" "0 patient-patient" "0 patient-patient" ...
## $ minutes : num [1:46062] 3.833 4.833 13.233 4.233 0.717 ...
## $ shiftnum : num [1:46062] 1 1 1 1 1 1 1 1 1 1 ...
net_all_xl %>%
rmarkdown::paged_table()
net_all_sasnet_all_sas <- read_sas(paste(data_path, "networks", "network_allshifts.sas7bdat",
sep = "/"))
# str(net_all_sas)
net_all_sas %>%
rmarkdown::paged_table()
generate_dictionary(net_all_sas)
edges_alledges_all <- read_sas(paste(data_path, "networks", "allshifts_edges.sas7bdat", sep = "/"))
# str(edges_all)
edges_all %>%
rmarkdown::paged_table()
edges2 <- read_sas(paste(data_path, "networks", "edges2.sas7bdat", sep = "/"))
rmarkdown::paged_table(edges2)
events_longevents_long <- read_sas(paste(data_path, "events", "eventfile_long2.sas7bdat", sep = "/"))
rmarkdown::paged_table(events_long)
# write_csv(events_long, file = paste(data_path, 'csv_files',
# 'events_long.csv', sep = '/'))
events_long has duplicates, use
eventfile_nodup if needed.
events_uniqueevents_unique <- read_sas(paste(data_path, "events", "events_uniquepersid2.sas7bdat",
sep = "/"))
rmarkdown::paged_table(events_unique)
eventfile_nodupeventfile_nodup <- read_sas(paste(data_path, "events", "eventfile_long6944_noduploc.sas7bdat",
sep = "/"))
eventfile_nodup %>%
rmarkdown::paged_table()
# write_csv(eventfile_nodup, file = paste(data_path, 'csv_files',
# 'eventfile_nodup.csv', sep = '/')) write_rds(eventfile_nodup, file =
# paste(data_path, 'Rdata', 'eventfile_nodup.Rdata', sep = '/'))
rdata_dir <- paste0(getwd(), "/Data/Rdata/")
# population.Rdata
population <- read_rds(paste0(rdata_dir, "population.Rdata"))
glimpse(population)
## Rows: 57,514
## Columns: 12
## $ med_rec_no <chr> "1173785", "870282", "1061988", "856823", "856823", "8…
## $ encounter_no <chr> "37829225", "38040069", "38229285", "38360060", "38369…
## $ arrival_date <date> 2009-08-13, 2010-03-10, 2009-10-12, 2010-03-01, 2009-…
## $ departure_date <date> 2009-08-13, 2010-03-11, 2009-10-12, 2010-03-01, 2009-…
## $ sex <fct> Female, Female, Male, Female, Female, Female, Male, Fe…
## $ age <dbl> 47, 49, 57, 44, 43, 44, 50, 50, 42, 42, 70, 70, 54, 90…
## $ race <fct> Black, Black, Black, Black, Black, Black, Black, Black…
## $ chief_complaint <chr> "Earache", "Assault", "Difficulty Breathing", "Seizure…
## $ acuity <fct> 4 Stable, 4 Stable, 2 Emergent, 3 Urgent, 2 Emergent, …
## $ arrival_mode <fct> EMS Ground, Private Vehicle, EMS Ground, Private Vehic…
## $ arrival_cat <fct> EMS, Ambulatory, EMS, Ambulatory, EMS, Ambulatory, Amb…
## $ disposition <fct> Discharge, Discharge, Admit, NA, Admit, Discharge, Dis…
# names(population) demographics.Rdata demographics <-
# read_rds(paste0(rdata_dir, 'demographics_clean.Rdata')) glimpse(demographics)
# pts_all.Rdata
pts_all <- read_rds(paste0(rdata_dir, "pts_all.Rdata"))
# glimpse(pts_all) names(pts_all)
test <- population %>%
full_join(pts_all, by = c("encounter_no", "sex", "age", "race", "chief_complaint",
"acuity", "arrival_mode", "arrival_cat", "disposition"))
glimpse(test)
## Rows: 58,215
## Columns: 35
## $ med_rec_no <chr> "1173785", "870282", "1061988", "856823", "856823", …
## $ encounter_no <chr> "37829225", "38040069", "38229285", "38360060", "383…
## $ arrival_date <date> 2009-08-13, 2010-03-10, 2009-10-12, 2010-03-01, 200…
## $ departure_date <date> 2009-08-13, 2010-03-11, 2009-10-12, 2010-03-01, 200…
## $ sex <fct> Female, Female, Male, Female, Female, Female, Male, …
## $ age <dbl> 47, 49, 57, 44, 43, 44, 50, 50, 42, 42, 70, 70, 54, …
## $ race <fct> Black, Black, Black, Black, Black, Black, Black, Bla…
## $ chief_complaint <chr> "Earache", "Assault", "Difficulty Breathing", "Seizu…
## $ acuity <fct> 4 Stable, 4 Stable, 2 Emergent, 3 Urgent, 2 Emergent…
## $ arrival_mode <fct> EMS Ground, Private Vehicle, EMS Ground, Private Veh…
## $ arrival_cat <fct> EMS, Ambulatory, EMS, Ambulatory, EMS, Ambulatory, A…
## $ disposition <chr> "Discharge", "Discharge", "Admit", NA, "Admit", "Dis…
## $ sid <chr> NA, NA, "002167be", NA, NA, NA, NA, NA, NA, NA, NA, …
## $ date <date> NA, NA, 2009-10-12, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ shift_num <chr> NA, NA, "96", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ shift_ampm <fct> NA, NA, am, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ encounter_no2 <chr> NA, NA, "38229285", NA, NA, NA, NA, NA, NA, NA, NA, …
## $ pt_id <chr> NA, NA, "3", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ age2 <dbl> NA, NA, 57, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ arrival_dttm <dttm> NA, NA, 2009-10-12 03:49:59, NA, NA, NA, NA, NA, NA…
## $ arrival_tm <time> NA, NA, 03:50:00.000000, …
## $ departure_dttm <dttm> NA, NA, 2009-10-12 09:07:00, NA, NA, NA, NA, NA, NA…
## $ departure_tm <time> NA, NA, 09:07:00.000000, …
## $ time2tag <dbl> NA, NA, 197, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ start_date <dttm> NA, NA, 2009-10-12 07:15:00, NA, NA, NA, NA, NA, NA…
## $ end_date <dttm> NA, NA, 2009-10-12 19:00:00, NA, NA, NA, NA, NA, NA…
## $ los <dbl> NA, NA, 317.7333, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ los_minutes <dbl> NA, NA, 317, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ los_hours <dbl> NA, NA, 5.283333, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ job <chr> NA, NA, "PATIENT", NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ approached <fct> NA, NA, Yes, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ participant <fct> NA, NA, Yes, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ pt_with_data <fct> NA, NA, Yes, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ participant_final <fct> NA, NA, 1, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ nested_loc <list> <NULL>, <NULL>, [<tbl_df[6765 x 2]>], <NULL>, <NULL…
all_pts <- test %>%
unnest(cols = "nested_loc", keep_empty = TRUE) # %>%
# glimpse() pts_all == pts_nested
glimpse(all_pts)
## Rows: 62,044,002
## Columns: 36
## $ med_rec_no <chr> "1173785", "870282", "1061988", "1061988", "1061988"…
## $ encounter_no <chr> "37829225", "38040069", "38229285", "38229285", "382…
## $ arrival_date <date> 2009-08-13, 2010-03-10, 2009-10-12, 2009-10-12, 200…
## $ departure_date <date> 2009-08-13, 2010-03-11, 2009-10-12, 2009-10-12, 200…
## $ sex <fct> Female, Female, Male, Male, Male, Male, Male, Male, …
## $ age <dbl> 47, 49, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, …
## $ race <fct> Black, Black, Black, Black, Black, Black, Black, Bla…
## $ chief_complaint <chr> "Earache", "Assault", "Difficulty Breathing", "Diffi…
## $ acuity <fct> 4 Stable, 4 Stable, 2 Emergent, 2 Emergent, 2 Emerge…
## $ arrival_mode <fct> EMS Ground, Private Vehicle, EMS Ground, EMS Ground,…
## $ arrival_cat <fct> EMS, Ambulatory, EMS, EMS, EMS, EMS, EMS, EMS, EMS, …
## $ disposition <chr> "Discharge", "Discharge", "Admit", "Admit", "Admit",…
## $ sid <chr> NA, NA, "002167be", "002167be", "002167be", "002167b…
## $ date <date> NA, NA, 2009-10-12, 2009-10-12, 2009-10-12, 2009-10…
## $ shift_num <chr> NA, NA, "96", "96", "96", "96", "96", "96", "96", "9…
## $ shift_ampm <fct> NA, NA, am, am, am, am, am, am, am, am, am, am, am, …
## $ encounter_no2 <chr> NA, NA, "38229285", "38229285", "38229285", "3822928…
## $ pt_id <chr> NA, NA, "3", "3", "3", "3", "3", "3", "3", "3", "3",…
## $ age2 <dbl> NA, NA, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, …
## $ arrival_dttm <dttm> NA, NA, 2009-10-12 03:49:59, 2009-10-12 03:49:59, 2…
## $ arrival_tm <time> NA, NA, 03:50:00.000000, …
## $ departure_dttm <dttm> NA, NA, 2009-10-12 09:07:00, 2009-10-12 09:07:00, 2…
## $ departure_tm <time> NA, NA, 09:07:00, 09:07:00, 09:07:00, 0…
## $ time2tag <dbl> NA, NA, 197, 197, 197, 197, 197, 197, 197, 197, 197,…
## $ start_date <dttm> NA, NA, 2009-10-12 07:15:00, 2009-10-12 07:15:00, 2…
## $ end_date <dttm> NA, NA, 2009-10-12 19:00:00, 2009-10-12 19:00:00, 2…
## $ los <dbl> NA, NA, 317.7333, 317.7333, 317.7333, 317.7333, 317.…
## $ los_minutes <dbl> NA, NA, 317, 317, 317, 317, 317, 317, 317, 317, 317,…
## $ los_hours <dbl> NA, NA, 5.283333, 5.283333, 5.283333, 5.283333, 5.28…
## $ job <chr> NA, NA, "PATIENT", "PATIENT", "PATIENT", "PATIENT", …
## $ approached <fct> NA, NA, Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes,…
## $ participant <fct> NA, NA, Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes,…
## $ pt_with_data <fct> NA, NA, Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes,…
## $ participant_final <fct> NA, NA, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ location_num <fct> NA, NA, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, …
## $ shift_second <dbl> NA, NA, 901, 902, 903, 904, 905, 906, 907, 908, 909,…
# staff_all == staff_locs_nested
staff_all <- read_rds(paste0(rdata_dir, "staff_all.Rdata"))
# glimpse(staff_all)
all_staff <- staff_all %>%
unnest(cols = nested_loc, keep_empty = TRUE)
glimpse(all_staff)
## Rows: 58,373,401
## Columns: 9
## Groups: sid, shift_num [2,218]
## $ sid <chr> "0002f4e2", "0002f4e2", "0002f4e2", "0002f4e2", "0002f…
## $ date <date> 2009-07-09, 2009-07-09, 2009-07-09, 2009-07-09, 2009-…
## $ shift_num <chr> "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1",…
## $ shift_ampm <fct> pm, pm, pm, pm, pm, pm, pm, pm, pm, pm, pm, pm, pm, pm…
## $ location_num <fct> 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91…
## $ shift_second <dbl> 3622, 3623, 3624, 3625, 3626, 3627, 3628, 3629, 3630, …
## $ job_sah <fct> STAFF, STAFF, STAFF, STAFF, STAFF, STAFF, STAFF, STAFF…
## $ participant_cat <fct> STAFF, STAFF, STAFF, STAFF, STAFF, STAFF, STAFF, STAFF…
## $ jobtitle <fct> STAFF, STAFF, STAFF, STAFF, STAFF, STAFF, STAFF, STAFF…
# what file format do data need to for uploading to google cloud???
# write_rds(all_pts, file = 'all_pts.Rdata') write_rds(all_staff, file =
# 'all_staff.Rdata')
# directory path
rdata_dir <- "/Users/tommy-two/Documents/1_Research/2_Data_Science/0_Projects/1_NACI/Data/Rdata"
rdata_archive <- paste0(rdata_dir, "/rds_archive")
# rdata_dir
# WRANGLING----
## X--ntab2_final----
ntab2_final <- read_rds(paste0(rdata_dir, "/ntab2_final.Rdata"))
# view loaded data
ntab2_final %>%
glimpse()
# ntab2_clean <- ntab2_clean %>%
# mutate(shift_num = as.character(shift_num),
# shift_person = as.character(shift_person),
# sid = format(sid, scientific = FALSE))
# ntab2_clean <- ntab2_clean %>%
# relocate(shift_person, .after = id) %>%
# mutate(sid = str_trim(sid))
# ntab2_nms <- names(ntab2_clean)
# Duplicate sid for two patients in the same shift (shift 91; 10/07/2009):
# ntab2_final %>%
# mutate(sid = (sid)) %>%
# group_by(shift_num) %>%
# count(sid) %>%
# filter(n > 1)
# one duplicate sid from shift 91:
# ntab2_final %>%
# filter(shift_num == "91" & sid == "2.17E+04")
# id_sid_final %>%
# glimpse()
# ntab2_final %>%
# # filter(str_detect(sid, "^2")) %>%
# filter(shift_num == "8") %>%
# count(sid) %>%
# arrange(desc(n)) #%>%
# filter(str_detect(sid, "2\\.17"))
# id_sid_final %>%
# # glimpse()
# group_by(shift_num) %>%
# count(sid) %>%
# arrange(desc(n)) %>%
# filter(str_detect(sid, "217"))
# filter(str_detect(sid, "217"))
# mutate(staff = if_else(staff == 1, "Staff", "Patient"))
# write_rds(ntab2_clean, file = paste0(rdata_dir, "/ntab2_final.Rdata"))
# rm(ntab2_clean)
## X--id_sid_clean----
id_sid_clean <- read_rds(paste0(rdata_dir, "/id_sid_clean.Rdata"))
# view loaded data
# id_sid_clean %>%
# glimpse()
# compare to original file: "id_sid_matchup2.sas7bdat"
# idsid_matchup <- read_sas("Data/ids/id_sid_matchup2.sas7bdat")
id_sid_clean %>%
glimpse()
# idsid_matchup %>%
# glimpse()
# group_by(numshift) %>%
# count(newsid) %>%
# filter(n > 1)
# summarise(max(newsid))
id_sid_clean <- id_sid_clean %>%
rename(shift_ampm = am_pm,
shift_person = newsid) # %>%
# glimpse()
# write_rds(id_sid_clean, paste0(rdata_dir, "/id_sid_final.Rdata"))
# rm(id_sid_clean)
id_sid_clean %>%
group_by(shift_num) %>%
count(sid) %>%
arrange(desc(n))
# **ids_final ------
id_sid_final <- read_rds(paste0(rdata_dir, "/id_sid_final.Rdata"))
id_sid_final %>%
group_by(staff) %>%
count(shift_num) %>%
ungroup() %>%
pivot_wider(names_from = staff, values_from = n) %>%
transmute(`Shift Number` = as.numeric(shift_num),
`Patients (n)` = `0`,
`Staff (n)` = `1`) %>%
arrange(`Shift Number`) %>%
rownames_to_column() %>%
mutate(`Shift Number` = rowname,
.keep = "unused") %>%
relocate(`Shift Number`, `Patients (n)`, `Staff (n)`) %>%
mutate(`Total (n)` = `Patients (n)` + `Staff (n)`)
id_sid_final %>%
glimpse()
## **staff_sids----
# staff_sids <- read_csv(file = "Data/csv_files/staff_sids_clean.csv",
# col_names = c("sid", "job_sah", "participant_cat", "jobtitle"),
# col_types = "cfff",
# skip = 1)
# Read saved R object from file = "staff_sids_clean.Rdata"
staff_sids <- read_rds(paste0(rdata_dir, "/staff_sids-5_1_22.Rdata"))
staff_sids %>%
# glimpse()
count(sid) %>%
arrange(desc(n))
# write_rds(staff_sids, paste0(rdata_dir, "/staff_sids_final.Rdata"))
# rm(staff_sids)
# no duplicates in staff sid. 88 total staff members.
staff_sids_final <- read_rds(paste0(rdata_dir, "/staff_sids_final.Rdata"))
staff_sids_final %>%
glimpse()
## **pt_sids----
pt_sid_clean <- read_rds(paste0(rdata_dir, "/pt_sid_clean.Rdata"))
# view loaded data
shifts_vec <- pt_sid_clean %>%
mutate(shift_num = as.integer(shift_num)) %>%
arrange(shift_num) %>%
pull(shift_num) %>%
unique() %>%
as.character()
# set seed for reproducibility:
set.seed(711)
# select a random sample of
random_shifts <- sample(shifts_vec, 20)
# many duplicate sids:
pt_sid_clean %>%
count(sid) %>%
filter(n > 1) %>%
arrange(desc(n))
# no duplicate sids when grouped by shift:
pt_sid_clean %>%
group_by(shift_num) %>%
count(sid) %>%
arrange(desc(n))
# write_rds(pt_sid_clean, paste0(rdata_dir, "/pt_sid_final.Rdata"))
# rm(pt_sid_clean)
## **population (from demographics_clean)----
demographics_clean <- read_rds(paste0(rdata_dir, "/demographics_clean.Rdata"))
demographics_clean %>%
glimpse()
# saved as population.Rdata
# write_rds(demographics_clean, paste0(rdata_dir, "/population.Rdata"))
## **pts_pop_clean----
pts_population <- read_rds(paste0(rdata_archive, "/pts_population.Rdata"))
pts_pop_clean <- pts_population %>%
mutate(sid,
encounter_no = str_trim(EncounterNum),
encounter_no2 = as.character(encnum),
pt_id = as.character(ID),
date = d8,
shift_num = as.character(numshift),
shift_ampm = factor(shift_ampm),
sex = factor(Sex),
age = as.numeric(Patient_Age_at_Visit),
age2 = AGE,
race = factor(Race,
levels = c("Black",
"White",
"Hispanic",
"Other"),
exclude = c("Not Recorded", "1", "2", "3")),
chief_complaint = Chief_Complaint,
acuity = factor(Acuity,
levels = c("1 Immediate",
"2 Emergent",
"3 Urgent",
"4 Stable",
"5 Non Urgent"),
exclude = "Not Recorded"),
arrival_dttm = ED_Arrival_Timestamp,
arrival_tm = timeARR,
arrival_mode = factor(Arr_Mode, exclude = "Not Recorded"),
arrival_cat = fct_recode(arrival_mode,
"EMS" = "EMS Ground",
"EMS" = "EMS Air",
"Custody" = "Police/Jail",
"Ambulatory" = "Private Vehicle",
"Ambulatory" = "Public Trans",
"Ambulatory" = "Walk"),
departure_dttm = ED_Departure_Timestamp,
departure_tm = timeDEP,
time2tag = duration_UntilTag,
start_date = startd8time,
end_date = endd8time,
los = durationInED,
los_minutes = MinutesInED,
los_hours = hrsinED,
disposition = ED_Disposition,
job = JOB,
approached = factor(approached),
participant = factor(participantyn),
pt_with_data = factor(Pt_withData),
# levels = c("Yes" = 1,
# "No" = 0)),
participant_final = factor(Participant_final),
.keep = "none") %>%
mutate(across(where(is.factor), ~na_if(., ""))) %>%
mutate(across(where(is.character), ~na_if(., ""))) %>%
mutate(across(where(is.factor), ~na_if(., "Not Recorded"))) %>%
mutate(across(where(is.character), ~na_if(., "Not Recorded"))) %>%
relocate(shift_ampm, .after = date) %>%
relocate(arrival_cat, .after = arrival_mode) %>%
relocate(approached, .before = participant)
# check results
pts_pop_clean %>%
glimpse()
# filter(encounter_no %in% demographics_clean$encounter_no)
count(shift_num)
# filter(encounter_no %in% pts_pop_clean$encounter_no)
# write pts_pop to rds file:
# write_rds(pts_pop, paste0(rdata_dir, "/pts_pop_clean.Rdata"))
# rm(pts_pop)
## X--allstaff_clean----
allstaff_clean <- read_rds(paste0(rdata_dir, "/allstaff_clean.Rdata"))
allstaff_clean %>%
glimpse()
## **pt_complete----
# pt_complete <- read_rds(paste0(rdata_dir, "/pt_complete_long.Rdata"))
# view loaded data
# pt_comp <- pt_complete %>%
# mutate(shift_num = as.character(shift_num),
# day_night = NULL) %>%
# rename(shift_ampm = am_pm)
pt_comp_nested <- read_rds(paste0(rdata_dir, "/pt_comp_nested-5_2_22.Rdata"))
# create unnested version
pt_comp_unnested <- pt_comp_nested %>%
unnest(cols = c(data))
# view unnested version
glimpse(pt_comp_unnested)
# ungroup unnested df
pt_comp_unnested %>%
ungroup() %>%
count(date) %>%
arrange(date) %>%
plot(date, n)
# renest df by sid & shift_num only
pt_comp_nested <- pt_comp_unnested %>%
group_by(sid, shift_num) %>%
nest(nested_loc = c(location_num, shift_second)) # %>%
# check for duplicates:
# pt_comp has many duplicate sids:
pt_comp %>%
count(sid) %>%
filter(n > 1)
# pt_comp_nested has no duplicate sids
pt_comp_nested %>%
# glimpse() %>%
count(sid) %>%
filter(n > 1)
# no duplicates found
# glimpse(pt_comp_unnested)
glimpse(pt_comp_nested)
# write_rds(pt_comp_nested, file = paste0(rdata_dir, "/pt_locs_nested.Rdata"))
# rm(pt_comp_nested)
## **staff_complete----
staff_complete <- read_rds(paste0(rdata_dir, "/staff_complete_long.Rdata"))
staff_comp <- staff_complete %>%
mutate(shift_num = as.character(shift_num),
firstday = NULL) %>%
rename(shift_ampm = am_pm,
shift_second = seconds) %>%
relocate(sid, date, shift_num, shift_ampm, location_num, shift_second)
# exclude from staff_sids all rows with sid not in staff_complete
stf_comp_sids <- staff_complete %>%
distinct(sid)
stf_sids_sids <- staff_sids %>%
select(sid)
excluded_staff_sids <- staff_sids %>%
filter(!(stf_sids_sids$sid %in% stf_comp_sids$sid)) %>%
pull(sid)
staff_sids_88 <- staff_sids %>%
filter(!(sid %in% excluded_staff_sids))
# write_rds(staff_sids_88, file = paste0(rdata_dir, "/staff_sids-5_1_22.Rdata"))
# write_csv(staff_sids_88, file = paste0("Data/csv_files/staff_sids-5_1_22"))
staff_comp %>%
glimpse()
# staff_comp has many duplicated sid numbers, but not within shifts:
staff_comp %>%
count(sid) %>%
filter(n > 1)
staff_comp_nested <- staff_comp %>%
group_by(sid, shift_num) %>%
nest(nested_loc = c(location_num, shift_second)) # %>%
# staff_comp_nested has no duplicated sids
# the following table shows number of observed staff (n) per shift, totals of staff working per shift is not available
staff_comp_nested %>%
ungroup() %>%
count(shift_num) %>%
arrange(desc(n))
# filter(n > 1)
# to unnest staff_comp_nested, use:
staff_comp_nested %>%
unnest(cols = c(nested_loc))
# view glimpse() of staff_comp_nested:
staff_comp_nested %>%
glimpse()
write_rds(staff_comp_nested, paste0(rdata_dir, "/staff_locs_nested.Rdata"))
rm(staff_comp, staff_comp_nested)
rm(staff_complete)
## **events_clean----
# load eventfile_nodup.Rdata
events <- read_rds(paste0(rdata_dir, "/eventfile_nodup.Rdata"))
# number of staff: 88
# events %>%
# filter(ParticipantCat2 == "STAFF") %>%
# group_by(sid) %>%
# count() %>%
# ungroup() %>%
# summarise(n_staff = length(sid))
# clean events:
events_clean <- events %>%
mutate(sid = sid,
pt_id = patient,
encounter_no = str_trim(EncounterNum),
participant_cat = factor(ParticipantCat4),
job_cat = factor(job),
job_name = if_else(jobfullname == "", "Patient", jobfullname),
date = ymd(d8),
shift_num = shift_num,
shift_start_dttm = startd8time,
shift_end_dttm = endd8time,
tag_on_dttm = tag_starttime,
tag_off_dttm = tagofftime,
event_id = factor(event),
event_dttm = ymd_hms(et),
event_tm = ti,
room_index = locindex,
room_name = room,
.keep = "none",
.after = "sid") %>%
relocate(pt_id, date, shift_num, participant_cat, .after = encounter_no)
# write_rds(events_clean, paste0(rdata_dir, "/events_final.Rdata"))
# rm(events_clean, events)
pt_id_vec <- events_clean %>%
filter(pt_id %in% pts_pop_clean$id_no) %>%
pull(pt_id)
# c(pt_id_vec)
pt_id_vec <- pt_id_vec %>%
# group_by(pt_id) %>%
distinct(pt_id) %>%
pull(pt_id)
pts_pop_clean %>%
filter(id_no %in% pt_id_vec$pt_id)
events_clean %>%
filter(pt_id %in% pt_id_vec$pt_id)
events_cl_nested <- events_clean %>%
group_by(shift_num, sid, encounter_no, date) %>%
nest()
# JOINING DFs:-----
# Read saved data objects:
id_sid <- read_rds(paste0(rdata_dir, "/id_sid_final.Rdata"))
ntab2 <- read_rds(paste0(rdata_dir, "/ntab2_final.Rdata"))
pt_info <- read_rds(paste0(rdata_dir, "/pt_info_final.Rdata"))
pt_sids <- read_rds(paste0(rdata_dir, "/pt_sid_final.Rdata"))
pts_nested <- read_rds(paste0(rdata_dir, "/pt_locs_nested.Rdata"))
staff_nested <- read_rds(paste0(rdata_dir, "/staff_locs_nested.Rdata"))
staff_sids <- read_rds(paste0(rdata_dir, "/staff_sids_final.Rdata"))
events <- read_rds(paste0(rdata_dir, "/events_final.Rdata"))
## staff_all <- staff_nested + staff_sids =====
staff_all <- staff_nested %>%
left_join(staff_sids, by = "sid")
staff_all %>%
glimpse()
# write_rds(staff_all, paste0(rdata_dir, "/staff_all.Rdata"))
# pts_all <- pts_nested + pt_info----
pt_info %>%
glimpse()
# names(pts_nested)
# names(pt_info)
# test <- pts_nested %>%
# full_join(pt_info, by = c("sid",
# "date",
# "shift_num",
# "shift_ampm"))
# glimpse(test)
# pts_nested %>%
# glimpse()
# move list column to last position
pts_all <- pts_all %>%
ungroup() %>%
relocate(nested_loc, .after = participant_final)
pts_all %>%
glimpse()
# pts_all_unnested <- pts_all %>%
# unnest(cols = nested_loc) #%>%
# save pts_all_unnested as .csv & .Rdata files:
# write_csv(pts_all_unnested, paste0("Data/csv_files/pts_all_unnested.csv"))
# write_rds(pts_all, paste0(rdata_dir, "/pts_all.Rdata"))
# SLICES:----
sliced_pts <- pts_all %>%
unnest(cols = c(nested_loc)) %>%
group_by(sid, shift_num) %>%
slice_sample(prop = 0.005)
# sliced_pts %>%
# ungroup() %>%
# group_by(encounter_no)# %>%
# summarise(n_pts = n(),
# n_participants = sum(as.numeric(participant_final), na.rm = TRUE))
# sliced_pts %>%
# # glimpse()
# ungroup() %>%
# count(sid) %>%
# summarise(sliced_pts_n = n(), sliced_obs_n = sum(n))
# summarise(sum(n))
# SETUP ---- create path to data archive
rdata_dir <- "/Users/tommy-two/Documents/1_Research/2_Data_Science/0_Projects/1_NACI/Data/Rdata"
rdata_archive <- paste0(rdata_dir, "/rds_archive")
## load packages library(naniar) # for missing data library(lubridate)
library(labelled)
library(vtable) # good package for summary tables
library(tidyverse)
## import data sets
population_730 <- read_rds(paste0(rdata_dir, "/population.Rdata"))
n_patients_730 <- population_730 %>%
nrow()
# n_patients_730
population_81 <- read_rds(paste0(rdata_dir, "/pt_info_final.Rdata"))
n_patients_81 <- population_81 %>%
nrow()
# n_patients_81
# Make a table of patient & staff counts (n) per shift
# (shift_counts_table.csv)----
id_sid_final <- read_rds(paste0(rdata_dir, "/id_sid_final.Rdata"))
shift_counts_table <- id_sid_final %>%
group_by(staff) %>%
count(shift_num) %>%
ungroup() %>%
pivot_wider(names_from = staff, values_from = n) %>%
transmute(`Shift Number` = as.numeric(shift_num), `Patients (n)` = `0`, `Staff (n)` = `1`) %>%
arrange(`Shift Number`) %>%
rownames_to_column() %>%
mutate(`Shift Number` = rowname, .keep = "unused") %>%
relocate(`Shift Number`, `Patients (n)`, `Staff (n)`) %>%
mutate(`Total (n)` = `Patients (n)` + `Staff (n)`)
# write_csv(shift_counts_table, paste0(getwd(),
# '/Tables/shift_counts_table.csv'))
sessioninfo::session_info() %>%
details::details(summary = "Current session info", open = FALSE)
─ Session info ───────────────────────────────────────────────────────────────
setting value
version R version 4.1.2 (2021-11-01)
os macOS Big Sur 10.16
system x86_64, darwin17.0
ui X11
language (EN)
collate en_US.UTF-8
ctype en_US.UTF-8
tz America/New_York
date 2022-05-31
pandoc 2.17.1.1 @ /Applications/RStudio.app/Contents/MacOS/quarto/bin/ (via rmarkdown)
─ Packages ───────────────────────────────────────────────────────────────────
package * version date (UTC) lib source
assertthat 0.2.1 2019-03-21 [1] CRAN (R 4.1.0)
backports 1.4.1 2021-12-13 [1] CRAN (R 4.1.0)
bit 4.0.4 2020-08-04 [1] CRAN (R 4.1.0)
bit64 4.0.5 2020-08-30 [1] CRAN (R 4.1.0)
broom 0.8.0 2022-04-13 [1] CRAN (R 4.1.2)
broom.helpers 1.7.0 2022-04-22 [1] CRAN (R 4.1.2)
bslib 0.3.1 2021-10-06 [1] CRAN (R 4.1.0)
cellranger 1.1.0 2016-07-27 [1] CRAN (R 4.1.0)
cli 3.3.0 2022-04-25 [1] CRAN (R 4.1.2)
clipr 0.8.0 2022-02-22 [1] CRAN (R 4.1.2)
colorspace 2.0-3 2022-02-21 [1] CRAN (R 4.1.2)
crayon 1.5.1 2022-03-26 [1] CRAN (R 4.1.2)
data.table * 1.14.2 2021-09-27 [1] CRAN (R 4.1.0)
DBI 1.1.2 2021-12-20 [1] CRAN (R 4.1.0)
dbplyr 2.1.1 2021-04-06 [1] CRAN (R 4.1.0)
desc 1.4.1 2022-03-06 [1] CRAN (R 4.1.2)
details 0.3.0 2022-03-27 [1] CRAN (R 4.1.2)
digest 0.6.29 2021-12-01 [1] CRAN (R 4.1.0)
dplyr * 1.0.9 2022-04-28 [1] CRAN (R 4.1.2)
ellipsis 0.3.2 2021-04-29 [1] CRAN (R 4.1.0)
evaluate 0.15 2022-02-18 [1] CRAN (R 4.1.2)
fansi 1.0.3 2022-03-24 [1] CRAN (R 4.1.2)
fastmap 1.1.0 2021-01-25 [1] CRAN (R 4.1.0)
forcats * 0.5.1 2021-01-27 [1] CRAN (R 4.1.0)
formatR 1.12 2022-03-31 [1] CRAN (R 4.1.2)
fs 1.5.2 2021-12-08 [1] CRAN (R 4.1.0)
generics 0.1.2 2022-01-31 [1] CRAN (R 4.1.2)
ggplot2 * 3.3.6 2022-05-03 [1] CRAN (R 4.1.2)
glue 1.6.2 2022-02-24 [1] CRAN (R 4.1.2)
gt 0.5.0 2022-04-21 [1] CRAN (R 4.1.2)
gtable 0.3.0 2019-03-25 [1] CRAN (R 4.1.0)
gtsummary * 1.6.0 2022-04-25 [1] CRAN (R 4.1.2)
haven * 2.5.0 2022-04-15 [1] CRAN (R 4.1.2)
hms 1.1.1 2021-09-26 [1] CRAN (R 4.1.0)
htmltools 0.5.2 2021-08-25 [1] CRAN (R 4.1.0)
httr 1.4.3 2022-05-04 [1] CRAN (R 4.1.2)
jquerylib 0.1.4 2021-04-26 [1] CRAN (R 4.1.0)
jsonlite 1.8.0 2022-02-22 [1] CRAN (R 4.1.2)
kableExtra * 1.3.4 2021-02-20 [1] CRAN (R 4.1.0)
knitr * 1.39 2022-04-26 [1] CRAN (R 4.1.2)
labelled * 2.9.1 2022-05-05 [1] CRAN (R 4.1.2)
lifecycle 1.0.1 2021-09-24 [1] CRAN (R 4.1.0)
lubridate * 1.8.0 2021-10-07 [1] CRAN (R 4.1.0)
magrittr 2.0.3 2022-03-30 [1] CRAN (R 4.1.2)
modelr 0.1.8 2020-05-19 [1] CRAN (R 4.1.0)
munsell 0.5.0 2018-06-12 [1] CRAN (R 4.1.0)
pillar 1.7.0 2022-02-01 [1] CRAN (R 4.1.2)
pkgconfig 2.0.3 2019-09-22 [1] CRAN (R 4.1.0)
png 0.1-7 2013-12-03 [1] CRAN (R 4.1.0)
purrr * 0.3.4 2020-04-17 [1] CRAN (R 4.1.0)
R6 2.5.1 2021-08-19 [1] CRAN (R 4.1.0)
readr * 2.1.2 2022-01-30 [1] CRAN (R 4.1.2)
readxl * 1.4.0 2022-03-28 [1] CRAN (R 4.1.2)
reprex 2.0.1 2021-08-05 [1] CRAN (R 4.1.0)
rlang 1.0.2 2022-03-04 [1] CRAN (R 4.1.2)
rmarkdown 2.14 2022-04-25 [1] CRAN (R 4.1.2)
rprojroot 2.0.3 2022-04-02 [1] CRAN (R 4.1.2)
rstudioapi 0.13 2020-11-12 [1] CRAN (R 4.1.0)
rvest 1.0.2 2021-10-16 [1] CRAN (R 4.1.0)
sass 0.4.1 2022-03-23 [1] CRAN (R 4.1.2)
scales 1.2.0 2022-04-13 [1] CRAN (R 4.1.2)
sessioninfo 1.2.2 2021-12-06 [1] CRAN (R 4.1.0)
stringi 1.7.6 2021-11-29 [1] CRAN (R 4.1.0)
stringr * 1.4.0 2019-02-10 [1] CRAN (R 4.1.0)
svglite 2.1.0 2022-02-03 [1] CRAN (R 4.1.2)
systemfonts 1.0.4 2022-02-11 [1] CRAN (R 4.1.2)
tibble * 3.1.7 2022-05-03 [1] CRAN (R 4.1.2)
tidyr * 1.2.0 2022-02-01 [1] CRAN (R 4.1.2)
tidyselect 1.1.2 2022-02-21 [1] CRAN (R 4.1.2)
tidyverse * 1.3.1 2021-04-15 [1] CRAN (R 4.1.0)
tzdb 0.3.0 2022-03-28 [1] CRAN (R 4.1.2)
utf8 1.2.2 2021-07-24 [1] CRAN (R 4.1.0)
vctrs 0.4.1 2022-04-13 [1] CRAN (R 4.1.2)
viridisLite 0.4.0 2021-04-13 [1] CRAN (R 4.1.0)
vroom 1.5.7 2021-11-30 [1] CRAN (R 4.1.0)
webshot 0.5.3 2022-04-14 [1] CRAN (R 4.1.2)
withr 2.5.0 2022-03-03 [1] CRAN (R 4.1.2)
xfun 0.30 2022-03-02 [1] CRAN (R 4.1.2)
xml2 1.3.3 2021-11-30 [1] CRAN (R 4.1.0)
yaml 2.3.5 2022-02-21 [1] CRAN (R 4.1.2)
[1] /Library/Frameworks/R.framework/Versions/4.1/Resources/library
──────────────────────────────────────────────────────────────────────────────